#################################
#################################
#################################
## Replication files for: #######
## When Is Gender on Party Agendas? 
## Weeks et al. 
## South European Society and Politics
## 2024 
#################################
#################################
#################################

rm(list = ls())

library(ggplot2)
library(directlabels)
library(RColorBrewer)
library(colorRamps)
getPalette = colorRampPalette(brewer.pal(9, "Spectral"))
library("multiwayvcov")
library("lmtest")
library("stargazer")
library("dplyr")
library(lme4)
library(lmerTest)
library(interplot)
options(scipen=999)

## There are two datasets: one party level, one sentence level. 

### Load the first data set (party level)

load("party_level_cleaned_mar16.RData")
head(test3)
nrow(test3)

test4<-test3

##################
#### Figure 1 
##################

set.seed(02138)
bypar <- ggplot(data = test4, aes(x = year, y = share_gender_all, label = partyname, fontface="bold")) +
  geom_point(size = 0.35, color = "black") + 
  geom_smooth(method = "loess", color = "grey") + # Change the line to grey
  labs(x = "",
       y = "% Manifesto Sentences Focused on Gender-Related Interests",
       title = "") +
  geom_text(check_overlap = TRUE, size = 5) +  # Increase text size
  theme_bw() + 
  theme(panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"),
        axis.title = element_text(size = 16),  # Increase axis title size
        axis.text = element_text(size = 14),   # Increase axis text size
        plot.title = element_text(size = 18),  # Increase plot title size
        legend.text = element_text(size = 12)) # Increase legend text size

bypar

mean(test4$share_gender_all[test4$year<2000])
mean(test4$share_gender_all[test4$year>2017])

################################
### Figure 2
################################

test4$partyfam2<-test4$partyfam
test4$partyfam2[test4$partyfam=="Radical Right"]<-"Far-Right"
table(test4$partyfam2)

### Bar plot party family within country in greyscale 

library(ggpubr)

# Set the order for partyfam2 levels
test5$partyfam2 <- factor(test5$partyfam2, levels = c("Communist / Left Lib.", 
                                                      "Social Dem.", 
                                                      "Liberal", 
                                                      "Conservative/Christian Dem.", 
                                                      "Far-Right"))

# Plot with ggbarplot
p <- ggbarplot(test5, x = "partyfam2", y = "share_gender_all",
           add = "mean", fill = "countryname",
           facet.by = "countryname",  x.text.angle = 90, xlab = FALSE, ylab = FALSE,  
           legend.title = "", palette="grey")

# Customize the theme
p + theme(panel.border = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(), 
          axis.line = element_line(colour = "black"),
          axis.title = element_text(size = 17),  # Increase axis title size
          axis.text = element_text(size = 17),   # Increase axis text size
          plot.title = element_text(size = 18),  # Increase plot title size
          legend.text = element_text(size = 14)) # Increase legend text size

### see how many parties are present across the data that we use in our models 
 vars<-c("share_gender_all", "comm_leftlib", "recession2", "femaleleader2", "cabinet_party2_lag", "year", "partyname", "countryname", "date")
testing<-na.omit(test3[,vars])
nrow(testing)
table(testing$countryname)
table(testing$partyname[testing$countryname=="Portugal"], testing$date[testing$countryname=="Portugal"]) ##27 out of 42
table(testing$partyname[testing$countryname=="Spain"], testing$date[testing$countryname=="Spain"]) ##18 out of 40
table(testing$partyname[testing$countryname=="Greece"], testing$date[testing$countryname=="Greece"]) ###20 
65/123 
### 53 percent of observations are present over the whole time period 


####################################################
###### Table 1  ##########
####################################################

library(lmtest)

lm1<-lm(share_gender_all ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4)
summary(lm1)
vcov_party1 <- cluster.vcov(lm1, test4$party)
lm1_se1 <- as.matrix(coeftest(lm1, vcov_party1)) 
lm1_se1
nobs(lm1_se1) ### 123 obs in our final model 

lm2<-lm(share_gender_eq ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4)
summary(lm2)
vcov_party2 <- cluster.vcov(lm2, test4$party)
lm1_se2 <- as.matrix(coeftest(lm2, vcov_party2)) 
lm1_se2

lm3<-lm(share_workfam ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4)
summary(lm3)
vcov_party3 <- cluster.vcov(lm3, test4$party)
lm1_se3 <- as.matrix(coeftest(lm3, vcov_party3)) 
lm1_se3

lm4<-lm(share_vaw ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4)
summary(lm4)
vcov_party4 <- cluster.vcov(lm4, test4$party)
lm1_se4 <- as.matrix(coeftest(lm4, vcov_party4)) 
lm1_se4

lm5<-lm(share_reprights ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + femaleleader2 + cabinet_party2_lag  + year + total_sen + as.factor(countryname), 
data = test4)
summary(lm5)
vcov_party5 <- cluster.vcov(lm5, test4$party)
lm1_se5 <- as.matrix(coeftest(lm5, vcov_party5)) 
lm1_se5

### Wald test of the difference between right wing parties 
library(car)
hypothesis <- "farright - con_chdem = 0"
wald_test <- linearHypothesis(lm5, hypothesis, vcov = vcov_party5)
print(wald_test) ## significant 

#### and test also between far right and far left 
hypothesis <- "farright - comm_leftlib = 0"
wald_test <- linearHypothesis(lm5, hypothesis, vcov = vcov_party5)
print(wald_test) ### not significant 

#### and test also between far right and social dem  
hypothesis <- "farright - socialdem = 0"
wald_test <- linearHypothesis(lm5, hypothesis, vcov = vcov_party5)
print(wald_test) ### not significant ## not significant 

lm6<-lm(share_sexuality ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4)
summary(lm6)
vcov_party6 <- cluster.vcov(lm6, test4$party)
lm1_se6 <- as.matrix(coeftest(lm6, vcov_party6)) 
lm1_se6

lm7<-lm(gender_pos ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4)
summary(lm7)
vcov_party7 <- cluster.vcov(lm7, test4$party)
lm1_se7 <- as.matrix(coeftest(lm7, vcov_party7)) 
lm1_se7


##########################
###### Figure 3 ##########
##########################

table(test4$partyfam)
test4$partyfam[test4$partyfam=="Radical Right"]<-"Far-Right"

test4$partyfam2 <- factor(test4$partyfam, levels = c("Communist / Left Lib.", "Social Dem.", "Liberal", "Conservative/Christian Dem.", "Far-Right"))

set.seed(02138)
bypar <- ggplot(data = test4, aes(x = year, y = gender_pos, color = partyfam2, shape = partyfam2 )) +
  geom_point(aes(shape = partyfam2), size = 4) + 
  geom_smooth(method = "loess", se=FALSE, size=1) +
  labs(x = "",
       y = "Gender Position",
       title = "")  + 
  theme_bw()

bypar + 
  scale_colour_grey(start = 0.1, end = 0.7) +  # Adjust start and end to darken greys
  theme(panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.title = element_blank(),
        plot.title = element_text(size = 18, face = "bold"),
        legend.text = element_text(size = 16)) + 
  theme(axis.title = element_text(size = 17),  # Increase axis title size
        axis.text = element_text(size = 17),   # Increase axis text size
        plot.title = element_text(size = 18),  # Increase plot title size
        legend.text = element_text(size = 14)) # Increase legend text size

############################
###### Table 2 #############
############################

### take out far right parties

test5<- test3[ which(test3$farright==0), ]
nrow(test5)
## 124

### Now look at 3 party families -- this is necessary because we don't have radical right parties 
#### enter parliament in the same years as liberal parties 
### were strong parties in any of the countries, so no variation pre to post rad right for liberal parties 

table( test4$lag1_farright_vote[test4$partyfam=="Liberal"])

test5$partyfam3<-test5$partyfam
test5$partyfam3[test5$partyfam=="Liberal"]<-"Conservative/Christian Dem."
table(test5$partyfam3)
test5$partyfam3<-factor(test5$partyfam3, levels = c("Communist / Left Lib.", "Conservative/Christian Dem.", "Social Dem."))

###run this to test impact on comm/left lib
## test5$partyfam3<-factor(test5$partyfam3, levels = c("Conservative/Christian Dem.","Communist / Left Lib.",  "Social Dem."))

lm10<-lm(share_gender_all ~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm10) ## null
vcov_party10 <- cluster.vcov(lm10, test5$party)
lm1_se10 <- as.matrix(coeftest(lm10, vcov_party10)) 
lm1_se10 ##interaction sig at .05 and pos for social dem

lm11<-lm(share_gender_eq ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm11) ##null
vcov_party11 <- cluster.vcov(lm11, test5$party)
lm1_se11 <- as.matrix(coeftest(lm11, vcov_party11)) 
lm1_se11 ## not sig

lm12<-lm(share_workfam ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm12)
vcov_party12 <- cluster.vcov(lm12, test5$party)
lm1_se12 <- as.matrix(coeftest(lm12, vcov_party12)) 
lm1_se12 ## not sig

lm13<-lm(share_vaw ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm13)
vcov_party13 <- cluster.vcov(lm13, test5$party)
lm1_se13 <- as.matrix(coeftest(lm13, vcov_party13)) 
lm1_se13 ## not sig

lm14<-lm(share_reprights ~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm14) ## social dem increase rep rights attn
vcov_party14 <- cluster.vcov(lm14, test5$party)
lm1_se14 <- as.matrix(coeftest(lm14, vcov_party14)) 
lm1_se14 ## social dem pos and sig at .05; right-wing parties also pos and sig at .01 

lm15<-lm(share_sexuality ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm15)
vcov_party15 <- cluster.vcov(lm15, test5$party)
lm1_se15 <- as.matrix(coeftest(lm15, vcov_party15)) 
lm1_se15 ## not sig 

lm16<-lm(gender_pos~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm16)
vcov_party16 <- cluster.vcov(lm16, test5$party)
lm1_se16 <- as.matrix(coeftest(lm16, vcov_party16)) 
lm1_se16 ## not sig 

###########################
###########################
## Appendix ###############

###########################
### Table A1: List of parties included by date 
###########################

table(test3$countryname)
table(test3$partyname[test3$countryname=="Portugal"], test3$date[test3$countryname=="Portugal"])
table(test3$partyname[test3$countryname=="Spain"], test3$date[test3$countryname=="Spain"])
table(test3$partyname[test3$countryname=="Greece"], test3$date[test3$countryname=="Greece"])

### 44 observations in Portugal and 61 percent are parties over the whole period (27/44)
### In Portugal PCP, PS, PSD present over all 9 elections. 
### Left Bloc and CDS for 7 of 9
## Chega in 2019 (2 obs) LI enters in 2022 (1 obs) 

### 47 observations in Greece and 20/47 are there the whole period (New Democracy and Syriza) 
### 45 observations in Spain and 18 are parties there the whole period
### so in total 65/136

###########################
################### Table A2: Summary statistics 
###########################

library(stargazer)
vars<-c("share_gender_all", "share_gender_eq", "share_workfam", 
"share_vaw", "share_reprights", "share_sexuality", "femaleleader2", "cabinet_party2_lag",
 "recession2", "total_sen")
stats<-na.omit(test3[vars])
head(stats)
nrow(stats)
stargazer(stats)

### now include gender position (smaller N)
vars<-c("share_gender_all", "share_gender_eq", "share_workfam", 
"share_vaw", "share_reprights", "share_sexuality", "femaleleader2", "cabinet_party2_lag",
 "recession2", "gender_pos", "total_sen")
stats<-na.omit(test3[vars])
head(stats)
nrow(stats)
stargazer(stats)

### and now include far right vote (smaller N)
vars<-c("share_gender_all", "share_gender_eq", "share_workfam", 
"share_vaw", "share_reprights", "share_sexuality", "femaleleader2", "cabinet_party2_lag",
 "recession2", "lag1_farright_vote", "total_sen")
stats<-na.omit(test3[vars])
head(stats)
nrow(stats)
stargazer(stats)

## mean by country 

mean(test3$share_gender_all[test3$countryname=="Greece"])
mean(test3$share_gender_all[test3$countryname=="Spain"])
mean(test3$share_gender_all[test3$countryname=="Portugal"])

########### Table A3 ##################
#### Exclude control variables -- focus on party family
####################################### 

lm1a<-lm(share_gender_all ~ comm_leftlib + socialdem + con_chdem + farright + as.factor(countryname), 
data = test4)
summary(lm1a)
vcov_party1a <- cluster.vcov(lm1a, test4$party)
lm1_se1a <- as.matrix(coeftest(lm1a, vcov_party1a)) 
lm1_se1a ## 

lm2a<-lm(share_gender_eq ~ comm_leftlib + socialdem + con_chdem + farright + as.factor(countryname), 
data = test4)
summary(lm2a)
vcov_party2a <- cluster.vcov(lm2a, test4$party)
lm1_se2a<- as.matrix(coeftest(lm2a, vcov_party2a)) 
lm1_se2a ## 

lm3a<-lm(share_workfam ~ comm_leftlib + socialdem + con_chdem + farright  + as.factor(countryname), 
data = test4)
summary(lm3a)
vcov_party3a <- cluster.vcov(lm3a, test4$party)
lm1_se3a<- as.matrix(coeftest(lm3a, vcov_party3a)) 
lm1_se3a ## 

lm4a<-lm(share_vaw ~ comm_leftlib + socialdem + con_chdem + farright  + as.factor(countryname), 
data = test4)
summary(lm4a)
vcov_party4a <- cluster.vcov(lm4a, test4$party)
lm1_se4a<- as.matrix(coeftest(lm4a, vcov_party4a)) 
lm1_se4a ## 

lm5a<-lm(share_reprights ~ comm_leftlib + socialdem + con_chdem + farright + as.factor(countryname), 
data = test4)
summary(lm5a)
vcov_party5a <- cluster.vcov(lm5a, test4$party)
lm1_se5a<- as.matrix(coeftest(lm5a, vcov_party5a)) 
lm1_se5a ## 

lm6a<-lm(share_sexuality ~ comm_leftlib + socialdem + con_chdem + farright + as.factor(countryname), 
data = test4)
summary(lm6a)
vcov_party6a <- cluster.vcov(lm6a, test4$party)
lm1_se6a<- as.matrix(coeftest(lm6a, vcov_party6a)) 
lm1_se6a ## 

lm7a<-lm(gender_pos ~ comm_leftlib + socialdem + con_chdem + farright + as.factor(countryname), 
data = test4)
summary(lm7a)
vcov_party7a <- cluster.vcov(lm7a, test4$party)
lm1_se7a<- as.matrix(coeftest(lm7a, vcov_party7a)) 
lm1_se7a ## 

########### Table A4 ##################
#### Exclude control variables -- focus on recession
####################################### 

lm1b<-lm(share_gender_all ~ recession2  + as.factor(countryname), 
data = test4)
summary(lm1b)
vcov_party1b <- cluster.vcov(lm1b, test4$party)
lm1_se1b <- as.matrix(coeftest(lm1b, vcov_party1b)) 
lm1_se1b ##

lm2b<-lm(share_gender_eq ~ recession2 + as.factor(countryname), 
data = test4)
summary(lm2b)
vcov_party2b <- cluster.vcov(lm2b, test4$party)
lm1_se2b <- as.matrix(coeftest(lm2b, vcov_party2b)) 
lm1_se2b ##

lm3b<-lm(share_workfam ~ recession2  + as.factor(countryname), 
data = test4)
summary(lm3b)
vcov_party3b <- cluster.vcov(lm3b, test4$party)
lm1_se3b <- as.matrix(coeftest(lm3b, vcov_party3b)) 
lm1_se3b ##

lm4b<-lm(share_vaw ~ recession2 + as.factor(countryname), 
data = test4)
summary(lm4b)
vcov_party4b <- cluster.vcov(lm4b, test4$party)
lm1_se4b <- as.matrix(coeftest(lm4b, vcov_party4b)) 
lm1_se4b ##

lm5b<-lm(share_reprights ~ recession2  + as.factor(countryname), 
data = test4)
summary(lm5b)
vcov_party5b <- cluster.vcov(lm5b, test4$party)
lm1_se5b <- as.matrix(coeftest(lm5b, vcov_party5b)) 
lm1_se5b ##

lm6b<-lm(share_sexuality ~recession2 + as.factor(countryname), 
data = test4)
summary(lm6b)
vcov_party6b <- cluster.vcov(lm6b, test4$party)
lm1_se6b <- as.matrix(coeftest(lm6b, vcov_party6b)) 
lm1_se6b ##

lm7b<-lm(gender_pos ~ recession2  + as.factor(countryname), 
data = test4)
summary(lm7b)
vcov_party7b <- cluster.vcov(lm7b, test4$party)
lm1_se7b <- as.matrix(coeftest(lm7b, vcov_party7b)) 
lm1_se7b ##

############ Table A8 #################
### And now Table 2 with no controls 
####################################### 

### take out far right parties

test5<- test3[ which(test3$farright==0), ]
nrow(test5)
## 124

table( test4$lag1_farright_vote[test4$partyfam=="Liberal"])

test5$partyfam3<-test5$partyfam
test5$partyfam3[test5$partyfam=="Liberal"]<-"Conservative/Christian Dem."
table(test5$partyfam3)
test5$partyfam3<-factor(test5$partyfam3, levels = c("Communist / Left Lib.", "Conservative/Christian Dem.", "Social Dem."))

lm10a<-lm(share_gender_all ~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + as.factor(countryname), 
data = test5)
summary(lm10a) ## null
vcov_party10a <- cluster.vcov(lm10a, test5$party)
lm1_se10a <- as.matrix(coeftest(lm10a, vcov_party10a)) 
lm1_se10a ## findings hold 

lm11a<-lm(share_gender_eq ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3  + as.factor(countryname), 
data = test5)
summary(lm11a) ##null
vcov_party11a <- cluster.vcov(lm11a, test5$party)
lm1_se11a <- as.matrix(coeftest(lm11a, vcov_party11a)) 
lm1_se11a ## 

lm12a<-lm(share_workfam ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + as.factor(countryname), 
data = test5)
summary(lm12a)
vcov_party12a <- cluster.vcov(lm12a, test5$party)
lm1_se12a <- as.matrix(coeftest(lm12a, vcov_party12a)) 
lm1_se12a ##

lm13a<-lm(share_vaw ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + as.factor(countryname), 
data = test5)
summary(lm13a)
vcov_party13a <- cluster.vcov(lm13a, test5$party)
lm1_se13a <- as.matrix(coeftest(lm13a, vcov_party13a)) 
lm1_se13a ## 

lm14a<-lm(share_reprights ~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + as.factor(countryname), 
data = test5)
summary(lm14a) ## social dem increase rep rights attn
vcov_party14a <- cluster.vcov(lm14a, test5$party)
lm1_se14a <- as.matrix(coeftest(lm14a, vcov_party14a)) 
lm1_se14a ## interactions are both still sig  

lm15a<-lm(share_sexuality ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + as.factor(countryname), 
data = test5)
summary(lm15a)
vcov_party15a <- cluster.vcov(lm15a, test5$party)
lm1_se15a <- as.matrix(coeftest(lm15a, vcov_party15a)) 
lm1_se15a ##

lm16a<-lm(gender_pos~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3  + as.factor(countryname), 
data = test5)
summary(lm16a)
vcov_party16a <- cluster.vcov(lm16a, test5$party)
lm1_se16a <- as.matrix(coeftest(lm16a, vcov_party16a)) 
lm1_se16a ##


###################### 
#### Dropping one country at a time (iterate)
###################### 

test5<-test4[ which(test4$countryname!='Greece'), ]
nrow(test5) ##89
test5<-test4[ which(test4$countryname!='Portugal'), ]
nrow(test5) ##92
test5<-test4[ which(test4$countryname!='Spain'), ]
nrow(test5) ##91

lm1<-lm(share_gender_all ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm1)
vcov_party1 <- cluster.vcov(lm1, test5$party)
lm1_se1 <- as.matrix(coeftest(lm1, vcov_party1)) 
lm1_se1 ## findings hold dropping Greece, Portugal; recession finding holds when dropping Spain but party families no longer sig

lm2<-lm(share_gender_eq ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm2)
vcov_party2 <- cluster.vcov(lm2, test5$party)
lm1_se2 <- as.matrix(coeftest(lm2, vcov_party2)) 
lm1_se2 ## findings hold on dropping Greece, Portugal; recession finding holds when dropping Spain but party families no longer sig

lm3<-lm(share_workfam ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm3) ## female leader is no longer sig when dropping Greece
vcov_party3 <- cluster.vcov(lm3, test5$party)
lm1_se3 <- as.matrix(coeftest(lm3, vcov_party3)) 
lm1_se3 ## On dropping Greece, recession and women leader no longer significant; on dropping Portugal or Spain findings hold

lm4<-lm(share_vaw ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm4)
vcov_party4 <- cluster.vcov(lm4, test5$party)
lm1_se4 <- as.matrix(coeftest(lm4, vcov_party4)) 
lm1_se4 ## On dropping Greece or Spain, recession no longer significant; robust to exluding Portugal

lm5<-lm(share_reprights ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + femaleleader2 + cabinet_party2_lag  + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm5) ## finding for RR holds on dropping greece, portugal
vcov_party5 <- cluster.vcov(lm5, test5$party)
lm1_se5 <- as.matrix(coeftest(lm5, vcov_party5)) 
lm1_se5 ## On dropping Greece or Spain, far right no longer significant; robust to excluding Portugal

lm6<-lm(share_sexuality ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm6) ## link between comm/left lib and attn holds on dropping greece, portugal -- but not dropping Spain 
vcov_party6 <- cluster.vcov(lm6, test5$party)
lm1_se6 <- as.matrix(coeftest(lm6, vcov_party2)) 
lm1_se6 ## on dropping Greece or Portugal or Spain, comm/left lib no longer sig

lm7<-lm(gender_pos ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test5)
summary(lm7)# far right loses significance on dropping spain
vcov_party7 <- cluster.vcov(lm7, test5$party)
lm1_se7 <- as.matrix(coeftest(lm7, vcov_party7)) 
lm1_se7 ##  results hold on dropping Greece, Portugal, or Spain (far right sig at .1 level)


test6<- test5[ which(test5$farright==0), ]
nrow(test6)
## 

### Now look at 3 party families -- this is necessary because we don't have radical right parties enter parliament in the same years as liberal parties 
### were strong parties in any of the countries, so no variation pre to post rad right for liberal parties 
table( test4$lag1_farright_vote[test4$partyfam=="Liberal"])

test6$partyfam3<-test6$partyfam
test6$partyfam3[test6$partyfam=="Liberal"]<-"Conservative/Christian Dem."
table(test6$partyfam3)
test6$partyfam3<-factor(test6$partyfam3, levels = c( "Communist / Left Lib.", "Conservative/Christian Dem.", "Social Dem."))

lm10<-lm(share_gender_all ~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test6)
summary(lm10) ## null
vcov_party10 <- cluster.vcov(lm10, test6$party)
lm1_se10 <- as.matrix(coeftest(lm10, vcov_party10)) 
lm1_se10 ##  on dropping Spain, Social Dem interaction no longer sig

lm11<-lm(share_gender_eq ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test6)
summary(lm11) ## pos interaction between social dem and rr vote. holds on dropping greece, but not on dropping Portugal
lm12<-lm(share_workfam ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test6)
summary(lm12)
lm13<-lm(share_vaw ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test6)
summary(lm13)
lm14<-lm(share_reprights ~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test6)
summary(lm14) ## social dem increase rep rights attn -- holds when dropping Greece, borderline significant on dropping Portugal (p=.11) 
vcov_party14 <- cluster.vcov(lm14, test6$party)
lm1_se14 <- as.matrix(coeftest(lm14, vcov_party14)) 
lm1_se14 ##  On dropping Spain, Social Dem no longer sig

lm15<-lm(share_sexuality ~ lag1_farright_vote +  partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test6)
summary(lm15)
lm16<-lm(gender_pos~ lag1_farright_vote + partyfam3 +  lag1_farright_vote:partyfam3 + recession2 + femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test6)
summary(lm16)



######################
### Tests for serial correlation -- Lagrange Multiplier Test   ##########
######################

library(plm)
library(lmtest)

plm1<-plm(share_gender_all ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + 
femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4,  index=c("party", "date"),model="pooling")
summary(plm1)

plm2<-plm(share_gender_eq ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + 
femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4,  index=c("party", "date"),model="pooling")
summary(plm2)

plm3<-plm(share_workfam ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + 
femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4,  index=c("party", "date"),model="pooling")
summary(plm3)

plm4<-plm(share_vaw ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + 
femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4,  index=c("party", "date"),model="pooling")
summary(plm4)

plm5<-plm(share_reprights ~ comm_leftlib + socialdem + con_chdem + farright + recession2 + 
femaleleader2 + cabinet_party2_lag  + year + total_sen + as.factor(countryname), 
data = test4,  index=c("party", "date"),model="pooling")
summary(plm5)

plm6<-plm(share_sexuality ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + 
femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4,  index=c("party", "date"),model="pooling")
summary(plm6)

plm7<-plm(gender_pos ~ comm_leftlib + socialdem + con_chdem + farright  + recession2 + 
femaleleader2 + cabinet_party2_lag + year + total_sen + as.factor(countryname), 
data = test4,  index=c("party", "date"),model="pooling")
summary(plm7)

plmtest(plm1, type=c("bp")) ## p-value above .05 = no evidence of serial correlation
plmtest(plm2, type=c("bp")) ## p-value above .05 = no evidence of serial correlation
plmtest(plm3, type=c("bp")) ## p-value above .05 = no evidence of serial correlation
plmtest(plm4, type=c("bp")) ## p-value above .05 = no evidence of serial correlation
plmtest(plm5, type=c("bp")) ## p-value above .05 = no evidence of serial correlation
plmtest(plm6, type=c("bp")) ## p-value below .05 = evidence of serial correlation
plmtest(plm7, type=c("bp")) ## p-value below .05 = evidence of serial correlation

########################
##### Topic models 
########################

pacman::p_load(ggplot2, tidyr, dplyr,haven,cowplot,plyr,stargazer,xtable,plm,grid,coefplot,lubridate,ggpubr,modelsummary,stringi,
               quanteda,tidyverse,topicmodels,ldatuning,stm,wordcloud,reshape2,tibble)
theme_set(theme_bw())
set.seed(12345)

########################
####################### Create FIGURE 4 

tp <- read.csv("sentence_level_cleaned_v3.csv")
tp$party[tp$partyname=="PSD" & tp$year==2015]<-35313
tp$date[tp$partyname=="PP" & tp$year==2004]<-200403
tp$date[tp$partyname=="CiU" & tp$year==2011]<-201111
tp$date[tp$partyname=="New Democracy" & tp$year==2007]<-200709


tp$translated <- gsub(pattern = "\n", replacement = " ", x = tp$translated)
tp$translated <- gsub(pattern = " s ", replacement = " ", x = tp$translated)
tp$translated <- gsub(pattern = "s ", replacement = " ", x = tp$translated)
tp$translated <- gsub(pattern = "educationmandatory ", replacement = "", x = tp$translated)
tp$translated <- gsub(pattern = " thi ", replacement = "", x = tp$translated)
tp$translated <- gsub(pattern = " tothey", replacement = "", x = tp$translated)
tp$translated <- gsub(pattern = " nationalpopular", replacement = "", x = tp$translated)
tp$translated <- gsub(pattern = " n ", replacement = " ", x = tp$translated)
tp$translated <- gsub(pattern = " ha ", replacement = " ", x = tp$translated)
tp$translated <- gsub(pattern = " wa ", replacement = " ", x = tp$translated)
tp$translated <- gsub(pattern = " 89dear ", replacement = " ", x = tp$translated)
tp$translated <- gsub(pattern = " 89dear ", replacement = " ", x = tp$translated)
tp$translated <- stringi::stri_trans_general(tp$translated, "latin-ascii")
tp$translated<- str_replace_all(tp$translated, "[^[:alnum:]]", " ")

load("party_level_cleaned_mar16.RData")

stm_t <- subset(test3, select = -c(partyname, countryname, year))
tp <- subset(tp, select = -c(Comments...second.coder))

combine <- merge(tp, stm_t, by=c("party","date"),all.x = TRUE)

table(combine$partyfam)

combine$partyfam2<-combine$partyfam
combine$partyfam2[combine$partyfam=="Radical Right"]<-"Far-Right"
table(combine$partyfam2)

######### Do it by each party family
########## "Communist / Left Lib."
com_left <- subset(combine,combine$partyfam2 == "Communist / Left Lib.")
tokens <- com_left$translated %>%
  tokens(what = "word",
         remove_punct = TRUE,
         remove_numbers = TRUE,
         remove_url = TRUE) %>%
  tokens_tolower() %>%
  tokens_remove(stopwords("english"))
dfm <- dfm_trim(dfm(tokens), min_docfreq = 0.005, max_docfreq = 0.99, 
                docfreq_type = "prop", verbose = TRUE)
dfm_stm <- convert(dfm, to = "stm")
model_1 <- stm(documents = dfm_stm$documents,
             vocab = dfm_stm$vocab, 
             K = 10,
             verbose = TRUE)
par(bty="n",col="grey40",lwd=5)
plot(model_1)

########## "Social Dem."
social_dem <- subset(combine,combine$partyfam2 == "Social Dem.")
tokens <- social_dem$translated %>%
  tokens(what = "word",
         remove_punct = TRUE,
         remove_numbers = TRUE,
         remove_url = TRUE) %>%
  tokens_tolower() %>%
  tokens_remove(stopwords("english"))
dfm <- dfm_trim(dfm(tokens), min_docfreq = 0.005, max_docfreq = 0.99, 
                docfreq_type = "prop", verbose = TRUE)
dfm_stm <- convert(dfm, to = "stm")
model_2 <- stm(documents = dfm_stm$documents,
               vocab = dfm_stm$vocab, 
               K = 10,
               verbose = TRUE)
par(bty="n",col="grey40",lwd=5)
plot(model_2)

########## "Liberal"
liberal <- subset(combine,combine$partyfam2 == "Liberal")
tokens <- liberal$translated %>%
  tokens(what = "word",
         remove_punct = TRUE,
         remove_numbers = TRUE,
         remove_url = TRUE) %>%
  tokens_tolower() %>%
  tokens_remove(stopwords("english"))
dfm <- dfm_trim(dfm(tokens), min_docfreq = 0.005, max_docfreq = 0.99, 
                docfreq_type = "prop", verbose = TRUE)
dfm_stm <- convert(dfm, to = "stm")
model_3 <- stm(documents = dfm_stm$documents,
               vocab = dfm_stm$vocab, 
               K = 10,
               verbose = TRUE)
par(bty="n",col="grey40",lwd=5)
plot(model_3)

########## "Conservative/Christian Dem."
cons_christian <- subset(combine,combine$partyfam2 == "Conservative/Christian Dem.")
tokens <- cons_christian$translated %>%
  tokens(what = "word",
         remove_punct = TRUE,
         remove_numbers = TRUE,
         remove_url = TRUE) %>%
  tokens_tolower() %>%
  tokens_remove(stopwords("english"))
dfm <- dfm_trim(dfm(tokens), min_docfreq = 0.005, max_docfreq = 0.99, 
                docfreq_type = "prop", verbose = TRUE)
dfm_stm <- convert(dfm, to = "stm")
model_4 <- stm(documents = dfm_stm$documents,
               vocab = dfm_stm$vocab, 
               K = 10,
               verbose = TRUE)
par(bty="n",col="grey40",lwd=5)
plot(model_4)

########## "Radical Right"
rad_right <- subset(combine,combine$partyfam2 == "Far-Right")
tokens <- rad_right$translated %>%
  tokens(what = "word",
         remove_punct = TRUE,
         remove_numbers = TRUE,
         remove_url = TRUE) %>%
  tokens_tolower() %>%
  tokens_remove(stopwords("english"))
dfm <- dfm_trim(dfm(tokens), min_docfreq = 0.005, max_docfreq = 0.99, 
                docfreq_type = "prop", verbose = TRUE)
dfm_stm <- convert(dfm, to = "stm")
model_5 <- stm(documents = dfm_stm$documents,
               vocab = dfm_stm$vocab, 
               K = 10,
               verbose = TRUE)
par(bty="n",col="grey40",lwd=5)
plot(model_5)


############# Plot by Party Family
png("plot_by_partyfamily.png", width = 800, height = 1400)

par(mfrow = c(5, 1), lty = 1, lwd = 2, cex.axis = 1.5, cex.lab=1.5,col="grey26",bty="n")
# First plot //// "Communist / Left Lib."
topic_names_1 <- c("Work-Family:", "Women's Emp.:", "Discrimination:", "Education:")
plot.STM(model_1, topics = c(8, 1, 9, 6), labeltype = "prob", n = 4, text.cex = 1.5,
         main = "Top Topics Among Communist / Left Lib.", xlim = c(0.0, 0.3), cex.main = 2, topic.names = topic_names_1)

# Second plot //// "Social Dem."
topic_names_2 <- c(" Gender Equality:", "Women's Emp.:", "Education:", "Work-family:")
plot.STM(model_2, topics = c(8, 6, 9, 2), labeltype = "prob", n = 4, text.cex = 1.5,
         main = "Top Topics Among Social Dem.", xlim = c(0.0, 0.3), cex.main = 2, topic.names = topic_names_2)

# Third plot //// "Liberal"
topic_names_3 <- c("Training Programs:", "Gender Equality:", "Maternity Leave:", "Violence against Women:")
plot.STM(model_3, topics = c(9, 2, 7, 8), labeltype = "prob", n = 4, text.cex = 1.5,
         main = "Top Topics Among Liberal", xlim = c(0.0, 0.3), cex.main = 2, topic.names = topic_names_3)

# Fourth plot //// "Conservative/Christian Dem."
topic_names_4 <- c("Gender Equality:", " Violence against Women:", "Women's Emp.:", "Maternity Leave:")
plot.STM(model_4, topics = c(9, 4, 8, 2), labeltype = "prob", n = 4, text.cex = 1.5,
         main = "Top Topics Among Conservative/Christian Dem.", xlim = c(0.0, 0.3), cex.main = 2, topic.names = topic_names_4)

# Fifth plot //// "Far-Right"
topic_names_5 <- c("Work-Family:", "Natalism:", "Gender Equality:", "Parental Consent:")
plot.STM(model_5, topics = c(7, 8, 4, 1), labeltype = "prob", n = 4, text.cex = 1.5,
         main = "Top Topics Among Far-Right", xlim = c(0.0, 0.3), cex.main = 2, topic.names = topic_names_5)

dev.off()





